{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "bc892831",
   "metadata": {},
   "outputs": [],
   "source": [
    "import  pandas as pd \n",
    "import seaborn as sns\n",
    "from matplotlib import pyplot as plt\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "868e7150",
   "metadata": {},
   "outputs": [],
   "source": [
    "DA:\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "0d4f3a26",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>text</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>2967 6758 339 2021 1854 3731 4109 3792 4149 15...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>11</td>\n",
       "      <td>4464 486 6352 5619 2465 4802 1452 3137 5778 54...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>7346 4068 5074 3747 5681 6093 1777 2226 7354 6...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2</td>\n",
       "      <td>7159 948 4866 2109 5520 2490 211 3956 5520 549...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>3</td>\n",
       "      <td>3646 3055 3055 2490 4659 6065 3370 5814 2465 5...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   label                                               text\n",
       "0      2  2967 6758 339 2021 1854 3731 4109 3792 4149 15...\n",
       "1     11  4464 486 6352 5619 2465 4802 1452 3137 5778 54...\n",
       "2      3  7346 4068 5074 3747 5681 6093 1777 2226 7354 6...\n",
       "3      2  7159 948 4866 2109 5520 2490 211 3956 5520 549...\n",
       "4      3  3646 3055 3055 2490 4659 6065 3370 5814 2465 5..."
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_df = pd.read_csv('./datasets/train_set.csv', sep='\\t')\n",
    "train_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "5e002721",
   "metadata": {},
   "outputs": [],
   "source": [
    "from collections import defaultdict\n",
    "from collections import Counter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "0071459c",
   "metadata": {},
   "outputs": [],
   "source": [
    "label_counter = Counter(train_df.label.values)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "2d203438",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2 31425 15.712499999999999\n",
      "11 3131 1.5655\n",
      "3 22133 11.0665\n",
      "9 5878 2.939\n",
      "10 4920 2.46\n",
      "12 1821 0.9105\n",
      "0 38918 19.459\n",
      "7 8841 4.4205000000000005\n",
      "4 15016 7.507999999999999\n",
      "1 36945 18.4725\n",
      "6 9985 4.9925\n",
      "5 12232 6.116\n",
      "8 7847 3.9234999999999998\n",
      "13 908 0.45399999999999996\n"
     ]
    }
   ],
   "source": [
    "for _id, value in label_counter.items():\n",
    "    print(_id, value, value / len(train_df) * 100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "cae23430",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Counter({2: 31425,\n",
       "         11: 3131,\n",
       "         3: 22133,\n",
       "         9: 5878,\n",
       "         10: 4920,\n",
       "         12: 1821,\n",
       "         0: 38918,\n",
       "         7: 8841,\n",
       "         4: 15016,\n",
       "         1: 36945,\n",
       "         6: 9985,\n",
       "         5: 12232,\n",
       "         8: 7847,\n",
       "         13: 908})"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "label_counter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "c37db4cf",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     38918\n",
       "1     36945\n",
       "2     31425\n",
       "3     22133\n",
       "4     15016\n",
       "5     12232\n",
       "6      9985\n",
       "7      8841\n",
       "8      7847\n",
       "9      5878\n",
       "10     4920\n",
       "11     3131\n",
       "12     1821\n",
       "13      908\n",
       "Name: label, dtype: int64"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_df.label.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "480a6eb2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2508505178192"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# id(train_df.label)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "7966df7e",
   "metadata": {},
   "outputs": [],
   "source": [
    "train_df['text_len']= train_df['text'].apply(len)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "1d50b1dd",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>text</th>\n",
       "      <th>text_len</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>2967 6758 339 2021 1854 3731 4109 3792 4149 15...</td>\n",
       "      <td>5120</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>11</td>\n",
       "      <td>4464 486 6352 5619 2465 4802 1452 3137 5778 54...</td>\n",
       "      <td>2328</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>7346 4068 5074 3747 5681 6093 1777 2226 7354 6...</td>\n",
       "      <td>3702</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2</td>\n",
       "      <td>7159 948 4866 2109 5520 2490 211 3956 5520 549...</td>\n",
       "      <td>7622</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>3</td>\n",
       "      <td>3646 3055 3055 2490 4659 6065 3370 5814 2465 5...</td>\n",
       "      <td>1494</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   label                                               text  text_len\n",
       "0      2  2967 6758 339 2021 1854 3731 4109 3792 4149 15...      5120\n",
       "1     11  4464 486 6352 5619 2465 4802 1452 3137 5778 54...      2328\n",
       "2      3  7346 4068 5074 3747 5681 6093 1777 2226 7354 6...      3702\n",
       "3      2  7159 948 4866 2109 5520 2490 211 3956 5520 549...      7622\n",
       "4      3  3646 3055 3055 2490 4659 6065 3370 5814 2465 5...      1494"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "c87e7d65",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4840.2915674173255"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.std(train_df.text_len)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "41ab8cef",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([1.99888e+05, 8.00000e+01, 1.60000e+01, 1.10000e+01, 5.00000e+00]),\n",
       " array([9.000000e+00, 5.671320e+04, 1.134174e+05, 1.701216e+05,\n",
       "        2.268258e+05, 2.835300e+05]),\n",
       " <BarContainer object of 5 artists>)"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYoAAAD4CAYAAADy46FuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAXoUlEQVR4nO3df6xfdZ3n8edrW2GNylCk2zRt3aJ2Jqlkt0KjnYwaV1Yo7GaKG9Ytf9iOQ6wukGhmNmsZ/8A4kuhs1ISsorg0lInyY0RDM1u2dhkyZpItclGm/FDsBSG0KbRShNl1Fgd97x/fz509vX7v6eX++PYHz0dy8j3f9/l8zvl8+F6+r37POfd+U1VIkjSVf3K8ByBJOrEZFJKkXgaFJKmXQSFJ6mVQSJJ6LTzeA5hrZ599dq1cufJ4D0OSTioPPPDAz6pq8bBtp1xQrFy5krGxseM9DEk6qSR5aqptnnqSJPUyKCRJvQwKSVIvg0KS1MugkCT1OmZQJFmR5N4kjyZ5JMnHW/2sJLuT7GuPi1o9Sa5PMp5kb5LzOvva3NrvS7K5Uz8/yUOtz/VJ0ncMSdLoTOcTxcvAH1fVamAdcFWS1cBW4J6qWgXc054DXAysassW4AYYvOkD1wLvBN4BXNt5478B+Ein3/pWn+oYkqQROWZQVNXBqvpBW/874EfAMmADsL012w5c2tY3ALfUwB7gzCRLgYuA3VV1pKqeB3YD69u2M6pqTw3+5vktk/Y17BiSpBF5RdcokqwE3g7cByypqoNt0zPAkra+DHi6021/q/XV9w+p03MMSdKITPs3s5O8HrgT+ERVvdguIwBQVZVkXr8Bqe8YSbYwOM3Fm970phkfY+XW/z7jvierJz/3b473ECSd4Kb1iSLJaxiExDeq6tut/Gw7bUR7PNTqB4AVne7LW62vvnxIve8YR6mqG6tqbVWtXbx46J8qkSTN0HTuegpwE/CjqvpiZ9MOYOLOpc3AXZ36pnb30zrghXb6aBdwYZJF7SL2hcCutu3FJOvasTZN2tewY0iSRmQ6p55+D/gQ8FCSB1vtT4DPAXckuQJ4Cvhg27YTuAQYB34BfBigqo4k+VPg/tbuM1V1pK1fCdwMvBa4uy30HEOSNCLHDIqq+hsgU2y+YEj7Aq6aYl/bgG1D6mPAuUPqzw07hiRpdPzNbElSL4NCktTLoJAk9TIoJEm9DApJUi+DQpLUy6CQJPUyKCRJvQwKSVIvg0KS1MugkCT1MigkSb0MCklSL4NCktTLoJAk9TIoJEm9DApJUq/pfGf2tiSHkjzcqd2e5MG2PDnxFalJVib5+862r3b6nJ/koSTjSa5v349NkrOS7E6yrz0uavW0duNJ9iY5b85nL0k6pul8orgZWN8tVNV/qKo1VbUGuBP4dmfz4xPbqupjnfoNwEeAVW2Z2OdW4J6qWgXc054DXNxpu6X1lySN2DGDoqq+BxwZtq19KvggcGvfPpIsBc6oqj3tO7VvAS5tmzcA29v69kn1W2pgD3Bm248kaYRme43i3cCzVbWvUzsnyQ+T/HWSd7faMmB/p83+VgNYUlUH2/ozwJJOn6en6HOUJFuSjCUZO3z48CymI0mabLZBcTlHf5o4CLypqt4O/BHwzSRnTHdn7dNGvdJBVNWNVbW2qtYuXrz4lXaXJPVYONOOSRYC/w44f6JWVS8BL7X1B5I8Dvw2cABY3um+vNUAnk2ytKoOtlNLh1r9ALBiij6SpBGZzSeKfw38uKr+8ZRSksVJFrT1NzO4EP1EO7X0YpJ17brGJuCu1m0HsLmtb55U39TufloHvNA5RSVJGpHp3B57K/C/gN9Jsj/JFW3TRn7zIvZ7gL3tdtlvAR+rqokL4VcC/w0YBx4H7m71zwHvT7KPQfh8rtV3Ak+09l9v/SVJI3bMU09VdfkU9T8YUruTwe2yw9qPAecOqT8HXDCkXsBVxxqfJGl++ZvZkqReBoUkqZdBIUnqZVBIknoZFJKkXgaFJKmXQSFJ6mVQSJJ6GRSSpF4GhSSpl0EhSeplUEiSehkUkqReBoUkqZdBIUnqZVBIknoZFJKkXtP5KtRtSQ4lebhT+3SSA0kebMslnW3XJBlP8liSizr19a02nmRrp35Okvta/fYkp7X66e35eNu+cs5mLUmatul8orgZWD+k/qWqWtOWnQBJVjP4Lu23tT5fSbIgyQLgy8DFwGrg8tYW4PNtX28FngcmvpP7CuD5Vv9SaydJGrFjBkVVfQ84Ms39bQBuq6qXquqnwDjwjraMV9UTVfVL4DZgQ5IA7wO+1fpvBy7t7Gt7W/8WcEFrL0kaodlco7g6yd52ampRqy0Dnu602d9qU9XfCPy8ql6eVD9qX237C639b0iyJclYkrHDhw/PYkqSpMlmGhQ3AG8B1gAHgS/M1YBmoqpurKq1VbV28eLFx3MoknTKmVFQVNWzVfWrqvo18HUGp5YADgArOk2Xt9pU9eeAM5MsnFQ/al9t+2+19pKkEZpRUCRZ2nn6AWDijqgdwMZ2x9I5wCrg+8D9wKp2h9NpDC5476iqAu4FLmv9NwN3dfa1ua1fBvxVay9JGqGFx2qQ5FbgvcDZSfYD1wLvTbIGKOBJ4KMAVfVIkjuAR4GXgauq6ldtP1cDu4AFwLaqeqQd4pPAbUk+C/wQuKnVbwL+PMk4g4vpG2c7WUnSK3fMoKiqy4eUbxpSm2h/HXDdkPpOYOeQ+hP8/1NX3fr/Bf79scYnSZpf/ma2JKmXQSFJ6mVQSJJ6GRSSpF4GhSSpl0EhSeplUEiSehkUkqReBoUkqZdBIUnqZVBIknoZFJKkXgaFJKmXQSFJ6mVQSJJ6GRSSpF4GhSSp1zGDIsm2JIeSPNyp/ZckP06yN8l3kpzZ6iuT/H2SB9vy1U6f85M8lGQ8yfVJ0upnJdmdZF97XNTqae3G23HOm/PZS5KOaTqfKG4G1k+q7QbOrap/AfwEuKaz7fGqWtOWj3XqNwAfAVa1ZWKfW4F7qmoVcE97DnBxp+2W1l+SNGLHDIqq+h5wZFLtu1X1cnu6B1jet48kS4EzqmpPVRVwC3Bp27wB2N7Wt0+q31IDe4Az234kSSM0F9co/hC4u/P8nCQ/TPLXSd7dasuA/Z02+1sNYElVHWzrzwBLOn2enqLPUZJsSTKWZOzw4cOzmIokabJZBUWSTwEvA99opYPAm6rq7cAfAd9McsZ099c+bdQrHUdV3VhVa6tq7eLFi19pd0lSj4Uz7ZjkD4B/C1zQ3uCpqpeAl9r6A0keB34bOMDRp6eWtxrAs0mWVtXBdmrpUKsfAFZM0UeSNCIz+kSRZD3wn4Hfr6pfdOqLkyxo629mcCH6iXZq6cUk69rdTpuAu1q3HcDmtr55Un1Tu/tpHfBC5xSVJGlEjvmJIsmtwHuBs5PsB65lcJfT6cDudpfrnnaH03uAzyT5B+DXwMeqauJC+JUM7qB6LYNrGhPXNT4H3JHkCuAp4IOtvhO4BBgHfgF8eDYTlSTNzDGDoqouH1K+aYq2dwJ3TrFtDDh3SP054IIh9QKuOtb4JEnzy9/MliT1MigkSb0MCklSL4NCktTLoJAk9TIoJEm9DApJUi+DQpLUy6CQJPUyKCRJvQwKSVIvg0KS1MugkCT1MigkSb0MCklSL4NCktTLoJAk9ZpWUCTZluRQkoc7tbOS7E6yrz0uavUkuT7JeJK9Sc7r9Nnc2u9LsrlTPz/JQ63P9e17tac8hiRpdKb7ieJmYP2k2lbgnqpaBdzTngNcDKxqyxbgBhi86TP4vu13Au8Aru288d8AfKTTb/0xjiFJGpFpBUVVfQ84Mqm8Adje1rcDl3bqt9TAHuDMJEuBi4DdVXWkqp4HdgPr27YzqmpP+57sWybta9gxJEkjMptrFEuq6mBbfwZY0taXAU932u1vtb76/iH1vmMcJcmWJGNJxg4fPjzD6UiShpmTi9ntk0DNxb5mcoyqurGq1lbV2sWLF8/nMCTpVWc2QfFsO21EezzU6geAFZ12y1utr758SL3vGJKkEZlNUOwAJu5c2gzc1alvanc/rQNeaKePdgEXJlnULmJfCOxq215Msq7d7bRp0r6GHUOSNCILp9Moya3Ae4Gzk+xncPfS54A7klwBPAV8sDXfCVwCjAO/AD4MUFVHkvwpcH9r95mqmrhAfiWDO6teC9zdFnqOIUkakWkFRVVdPsWmC4a0LeCqKfazDdg2pD4GnDuk/tywY0iSRsffzJYk9TIoJEm9DApJUi+DQpLUy6CQJPUyKCRJvQwKSVIvg0KS1MugkCT1MigkSb0MCklSL4NCktTLoJAk9TIoJEm9DApJUi+DQpLUy6CQJPWacVAk+Z0kD3aWF5N8Ismnkxzo1C/p9LkmyXiSx5Jc1Kmvb7XxJFs79XOS3Nfqtyc5beZTlSTNxIyDoqoeq6o1VbUGOJ/B92N/p23+0sS2qtoJkGQ1sBF4G7Ae+EqSBUkWAF8GLgZWA5e3tgCfb/t6K/A8cMVMxytJmpm5OvV0AfB4VT3V02YDcFtVvVRVPwXGgXe0ZbyqnqiqXwK3ARuSBHgf8K3Wfztw6RyNV5I0TXMVFBuBWzvPr06yN8m2JItabRnwdKfN/labqv5G4OdV9fKk+m9IsiXJWJKxw4cPz342kqR/NOugaNcNfh/4i1a6AXgLsAY4CHxhtsc4lqq6sarWVtXaxYsXz/fhJOlVZeEc7ONi4AdV9SzAxCNAkq8Df9meHgBWdPotbzWmqD8HnJlkYftU0W0vSRqRuTj1dDmd005Jlna2fQB4uK3vADYmOT3JOcAq4PvA/cCqdofTaQxOY+2oqgLuBS5r/TcDd83BeCVJr8CsPlEkeR3wfuCjnfKfJVkDFPDkxLaqeiTJHcCjwMvAVVX1q7afq4FdwAJgW1U90vb1SeC2JJ8FfgjcNJvxSpJeuVkFRVX9HwYXnbu1D/W0vw64bkh9J7BzSP0JBndFSZKOE38zW5LUy6CQJPUyKCRJvQwKSVIvg0KS1MugkCT1MigkSb0MCklSL4NCktTLoJAk9TIoJEm9DApJUi+DQpLUy6CQJPUyKCRJvQwKSVIvg0KS1GvWQZHkySQPJXkwyVirnZVkd5J97XFRqyfJ9UnGk+xNcl5nP5tb+31JNnfq57f9j7e+me2YJUnTN1efKP5VVa2pqrXt+VbgnqpaBdzTngNcDKxqyxbgBhgEC3At8E4GX3167US4tDYf6fRbP0djliRNw3ydetoAbG/r24FLO/VbamAPcGaSpcBFwO6qOlJVzwO7gfVt2xlVtaeqCrilsy9J0gjMRVAU8N0kDyTZ0mpLqupgW38GWNLWlwFPd/rub7W++v4h9aMk2ZJkLMnY4cOHZzsfSVLHwjnYx7uq6kCSfwbsTvLj7saqqiQ1B8eZUlXdCNwIsHbt2nk9liS92sz6E0VVHWiPh4DvMLjG8Gw7bUR7PNSaHwBWdLovb7W++vIhdUnSiMwqKJK8LskbJtaBC4GHgR3AxJ1Lm4G72voOYFO7+2kd8EI7RbULuDDJonYR+0JgV9v2YpJ17W6nTZ19SZJGYLannpYA32l3rC4EvllV/yPJ/cAdSa4AngI+2NrvBC4BxoFfAB8GqKojSf4UuL+1+0xVHWnrVwI3A68F7m6LJGlEZhUUVfUE8C+H1J8DLhhSL+CqKfa1Ddg2pD4GnDubcUqSZs7fzJYk9TIoJEm9DApJUi+DQpLUy6CQJPUyKCRJvQwKSVIvg0KS1MugkCT1MigkSb0MCklSL4NCktTLoJAk9TIoJEm9DApJUi+DQpLUy6CQJPWacVAkWZHk3iSPJnkkycdb/dNJDiR5sC2XdPpck2Q8yWNJLurU17faeJKtnfo5Se5r9duTnDbT8UqSZmY2nyheBv64qlYD64Crkqxu275UVWvashOgbdsIvA1YD3wlyYIkC4AvAxcDq4HLO/v5fNvXW4HngStmMV5J0gzMOCiq6mBV/aCt/x3wI2BZT5cNwG1V9VJV/RQYB97RlvGqeqKqfgncBmxIEuB9wLda/+3ApTMdryRpZubkGkWSlcDbgfta6eoke5NsS7Ko1ZYBT3e67W+1qepvBH5eVS9Pqg87/pYkY0nGDh8+PBdTkiQ1sw6KJK8H7gQ+UVUvAjcAbwHWAAeBL8z2GMdSVTdW1dqqWrt48eL5PpwkvaosnE3nJK9hEBLfqKpvA1TVs53tXwf+sj09AKzodF/eakxRfw44M8nC9qmi216SNCKzuespwE3Aj6rqi5360k6zDwAPt/UdwMYkpyc5B1gFfB+4H1jV7nA6jcEF7x1VVcC9wGWt/2bgrpmOV5I0M7P5RPF7wIeAh5I82Gp/wuCupTVAAU8CHwWoqkeS3AE8yuCOqauq6lcASa4GdgELgG1V9Ujb3yeB25J8Fvghg2CSJI3QjIOiqv4GyJBNO3v6XAdcN6S+c1i/qnqCwV1RkqTjxN/MliT1MigkSb0MCklSL4NCktTLoJAk9TIoJEm9DApJUi+DQpLUy6CQJPUyKCRJvQwKSVIvg0KS1MugkCT1MigkSb0MCklSL4NCktTLoJAk9TrhgyLJ+iSPJRlPsvV4j0eSXm1O6KBIsgD4MnAxsJrB93GvPr6jkqRXlxM6KBh8X/Z4VT1RVb8EbgM2HOcxSdKrysLjPYBjWAY83Xm+H3jn5EZJtgBb2tP/neSxGR7vbOBnM+x7IptyXvn8iEcyt07F1+tUnBM4r5PBP59qw4keFNNSVTcCN852P0nGqmrtHAzphOK8Th6n4pzAeZ3sTvRTTweAFZ3ny1tNkjQiJ3pQ3A+sSnJOktOAjcCO4zwmSXpVOaFPPVXVy0muBnYBC4BtVfXIPB5y1qevTlDO6+RxKs4JnNdJLVV1vMcgSTqBneinniRJx5lBIUnqZVA0J8OfCknyZJKHkjyYZKzVzkqyO8m+9rio1ZPk+jafvUnO6+xnc2u/L8nmTv38tv/x1jfzNI9tSQ4lebhTm/d5THWMeZ7Xp5McaK/Zg0ku6Wy7po3xsSQXdepDfxbbTR33tfrt7QYPkpzeno+37SvncE4rktyb5NEkjyT5eKuf1K9Xz7xO6tdr3lTVq35hcKH8ceDNwGnA3wKrj/e4hozzSeDsSbU/A7a29a3A59v6JcDdQIB1wH2tfhbwRHtc1NYXtW3fb23T+l48T/N4D3Ae8PAo5zHVMeZ5Xp8G/tOQtqvbz9npwDnt529B388icAewsa1/FfiPbf1K4KttfSNw+xzOaSlwXlt/A/CTNvaT+vXqmddJ/XrN13LcB3AiLMDvArs6z68Brjne4xoyzif5zaB4DFja1pcCj7X1rwGXT24HXA58rVP/WqstBX7cqR/Vbh7mspKj31DnfR5THWOe5zXVG89RP2MM7uz73al+Ftub6M+AhZN/Zif6tvWFrV3m6XW7C3j/qfJ6DZnXKfV6zdXiqaeBYX8qZNlxGkufAr6b5IEM/mwJwJKqOtjWnwGWtPWp5tRX3z+kPiqjmMdUx5hvV7fTMNs6p09e6bzeCPy8ql6eVD9qX237C639nGqnSN4O3Mcp9HpNmhecIq/XXDIoTi7vqqrzGPw13auSvKe7sQb/RDnp73cexTxG+N/qBuAtwBrgIPCFERxzziV5PXAn8ImqerG77WR+vYbM65R4veaaQTFwUvypkKo60B4PAd9h8Nd1n02yFKA9HmrNp5pTX335kPqojGIeUx1j3lTVs1X1q6r6NfB1Bq8ZvPJ5PQecmWThpPpR+2rbf6u1nxNJXsPgzfQbVfXtVj7pX69h8zoVXq/5YFAMnPB/KiTJ65K8YWIduBB4mME4J+4g2czgXCutvqndhbIOeKF9jN8FXJhkUftYfSGDc6cHgReTrGt3nWzq7GsURjGPqY4xbybe6JoPMHjNJsaysd0Bcw6wisFF3aE/i+1f1PcClw0Zf3delwF/1drPxfgD3AT8qKq+2Nl0Ur9eU83rZH+95s3xvkhyoiwM7tb4CYM7GD51vMczZHxvZnBHxd8Cj0yMkcG5zXuAfcD/BM5q9TD40qfHgYeAtZ19/SEw3pYPd+prGfyP8TjwX5m/C6K3MvhY/w8Mzt1eMYp5THWMeZ7Xn7dx72XwBrG00/5TbYyP0bnDbKqfxfYz8P02378ATm/1f9qej7ftb57DOb2LwSmfvcCDbbnkZH+9euZ1Ur9e87X4JzwkSb089SRJ6mVQSJJ6GRSSpF4GhSSpl0EhSeplUEiSehkUkqRe/w/lLuE/1q/o4QAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.hist(train_df.text_len, bins=5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1c742f62",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "80423758",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "bb0e1d23",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>text</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>5399 3117 1070 4321 4568 2621 5466 3772 4516 2...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2491 4109 1757 7539 648 3695 3038 4490 23 7019...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2673 5076 6835 2835 5948 5677 3247 4124 2465 5...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4562 4893 2210 4761 3659 1324 2595 5949 4583 2...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4269 7134 2614 1724 4464 1324 3370 3370 2106 2...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                text\n",
       "0  5399 3117 1070 4321 4568 2621 5466 3772 4516 2...\n",
       "1  2491 4109 1757 7539 648 3695 3038 4490 23 7019...\n",
       "2  2673 5076 6835 2835 5948 5677 3247 4124 2465 5...\n",
       "3  4562 4893 2210 4761 3659 1324 2595 5949 4583 2...\n",
       "4  4269 7134 2614 1724 4464 1324 3370 3370 2106 2..."
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_df = pd.read_csv('./datasets/test_a.csv', sep='\\t')\n",
    "test_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "63b6fc7b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<AxesSubplot:xlabel='label', ylabel='Count'>"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZEAAAEGCAYAAACkQqisAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAaoElEQVR4nO3dcZAW9Z3n8fcnjBgTQsD4HMXN4A63TiWgd0GdKIl7WwY3OGgukCs3h5cLczk2eCXumbtUVrj9w2wSt2LtJmbdQ1eirLjrSThiClYxhEOSVK5WZIgEBPRmxLjMHMqsoIZNrWbc7/3x/EYfh2eYh2b66XmYz6uqa7q//evubyPM1+5f968VEZiZmWXxrqITMDOzxuUiYmZmmbmImJlZZi4iZmaWmYuImZll1lR0AvV23nnnRWtra9FpmJk1lF27dv19RJSGxsddEWltbaWrq6voNMzMGoqkF6rFc7+dJWmCpKckPZKWZ0raIalH0nclTUzxs9NyT1rfWrGPlSn+rKSrK+IdKdYjaUXe52JmZu9Ujz6Rm4EDFcu3A3dExAXAMWBpii8FjqX4HakdkmYDi4ELgQ7grlSYJgCrgAXAbOD61NbMzOok1yIiqQW4Frg3LQuYB2xITdYCi9L8wrRMWn9Var8QWBcRr0fE80APcFmaeiLiYES8AaxLbc3MrE7yvhL5NvAHwD+l5Q8Ar0TEQFruBZrTfDNwCCCtfzW1fys+ZJvh4ieQtExSl6Su/v7+0zwlMzMblFsRkfRJ4EhE7MrrGLWKiNUR0R4R7aXSCQ8XmJlZRnk+nXUF8ClJ1wDvBiYDfwZMkdSUrjZagL7Uvg+YAfRKagLeD7xcER9Uuc1wcTMzq4PcrkQiYmVEtEREK+WO8ccj4rPAduC61KwT2JjmN6Vl0vrHozzE8CZgcXp6aybQBjwJ7ATa0tNeE9MxNuV1PmZmdqIi3hO5BVgn6evAU8B9KX4f8FeSeoCjlIsCEbFP0npgPzAALI+INwEk3QRsASYAayJiX13PxMxsnNN4+55Ie3t7ZHnZcGBggO7u7reW29raaGoad+9qmtk4JWlXRLQPjfu3YI26u7u5YdWjTCo1c7y/j3uWX8usWbOKTsvMrFAuIqdgUqmZydNbi07DzGzM8Ci+ZmaWmYuImZll5iJiZmaZuYiYmVlmLiJmZpaZi4iZmWXmImJmZpn5PZGC+U14M2tk/m1VML8Jb2aNzEVkDPCb8GbWqNwnYmZmmbmImJlZZi4iZmaWmYuImZll5iJiZmaZ5VZEJL1b0pOSfi5pn6Q/SvH7JT0vaXea5qS4JN0pqUfSHkmXVOyrU1J3mjor4pdK2pu2uVOS8jofMzM7UZ6P+L4OzIuI45LOAn4q6bG07ssRsWFI+wVAW5ouB+4GLpd0LnAr0A4EsEvSpog4ltp8AdgBbAY6gMcwM7O6yO1KJMqOp8Wz0nSyD7ovBB5I2z0BTJE0Hbga2BoRR1Ph2Ap0pHWTI+KJKH8o/gFgUV7nY2ZmJ8q1T0TSBEm7gSOUC8GOtOq2dMvqDklnp1gzcKhi894UO1m8t0rczMzqJNciEhFvRsQcoAW4TNJFwErgQ8BHgHOBW/LMAUDSMkldkrr6+/vzPpyZ2bhRl6ezIuIVYDvQERGH0y2r14G/BC5LzfqAGRWbtaTYyeItVeLVjr86Itojor1UKo3CGZmZGeT7dFZJ0pQ0fw7wCeCZ1JdBepJqEfB02mQTsCQ9pTUXeDUiDgNbgPmSpkqaCswHtqR1r0mam/a1BNiY1/mYmdmJ8nw6azqwVtIEysVqfUQ8IulxSSVAwG7gP6f2m4FrgB7gV8DnASLiqKSvATtTu69GxNE0fyNwP3AO5aey/GSWmVkd5VZEImIPcHGV+Lxh2gewfJh1a4A1VeJdwEWnl6mZmWXlN9bNzCwzFxEzM8vMRcTMzDJzETEzs8xcRMzMLDMXETMzy8xFxMzMMnMRMTOzzFxEzMwsMxcRMzPLzEXEzMwycxExM7PMXETMzCyzPIeCtwINDAzQ3d391nJbWxtNTf7PbWajy79VzlDd3d3csOpRJpWaOd7fxz3Lr2XWrFlFp2VmZxgXkTPYpFIzk6e3Fp2GmZ3B3CdiZmaZuYiYmVlmuRURSe+W9KSkn0vaJ+mPUnympB2SeiR9V9LEFD87Lfek9a0V+1qZ4s9Kuroi3pFiPZJW5HUuZmZWXZ5XIq8D8yLiw8AcoEPSXOB24I6IuAA4BixN7ZcCx1L8jtQOSbOBxcCFQAdwl6QJkiYAq4AFwGzg+tTWzMzqJLciEmXH0+JZaQpgHrAhxdcCi9L8wrRMWn+VJKX4uoh4PSKeB3qAy9LUExEHI+INYF1qa2ZmdZJrn0i6YtgNHAG2As8Br0TEQGrSCzSn+WbgEEBa/yrwgcr4kG2Gi1fLY5mkLkld/f39o3BmZmYGOReRiHgzIuYALZSvHD6U5/FOksfqiGiPiPZSqVRECmZmZ6S6PJ0VEa8A24GPAlMkDb6f0gL0pfk+YAZAWv9+4OXK+JBthoubmVmd5Pl0VknSlDR/DvAJ4ADlYnJdatYJbEzzm9Iyaf3jEREpvjg9vTUTaAOeBHYCbelpr4mUO9835XU+ZmZ2ojzfWJ8OrE1PUb0LWB8Rj0jaD6yT9HXgKeC+1P4+4K8k9QBHKRcFImKfpPXAfmAAWB4RbwJIugnYAkwA1kTEvhzPx8zMhsitiETEHuDiKvGDlPtHhsb/EfjdYfZ1G3BblfhmYPNpJ2tmZpn4jXUzM8vMRcTMzDJzETEzs8xcRMzMLDMXETMzy8xFxMzMMnMRMTOzzFxEzMwsMxcRMzPLzEXEzMwycxExM7PMXETMzCwzFxEzM8vMRcTMzDJzETEzs8xcRMzMLDMXETMzy8xFxMzMMsutiEiaIWm7pP2S9km6OcW/IqlP0u40XVOxzUpJPZKelXR1RbwjxXokraiIz5S0I8W/K2liXudjZmYnyvNKZAD4UkTMBuYCyyXNTuvuiIg5adoMkNYtBi4EOoC7JE2QNAFYBSwAZgPXV+zn9rSvC4BjwNIcz8fMzIbIrYhExOGI+Fma/yVwAGg+ySYLgXUR8XpEPA/0AJelqSciDkbEG8A6YKEkAfOADWn7tcCiXE7GzMyqqkufiKRW4GJgRwrdJGmPpDWSpqZYM3CoYrPeFBsu/gHglYgYGBKvdvxlkrokdfX394/GKZmZGXUoIpImAd8DvhgRrwF3A78JzAEOA9/MO4eIWB0R7RHRXiqV8j6cmdm40ZTnziWdRbmAPBgRDwNExEsV678DPJIW+4AZFZu3pBjDxF8GpkhqSlcjle3NzKwO8nw6S8B9wIGI+FZFfHpFs08DT6f5TcBiSWdLmgm0AU8CO4G29CTWRMqd75siIoDtwHVp+05gY17nY2ZmJ8rzSuQK4HPAXkm7U+y/U366ag4QwC+AGwAiYp+k9cB+yk92LY+INwEk3QRsASYAayJiX9rfLcA6SV8HnqJctMzMrE5yKyIR8VNAVVZtPsk2twG3VYlvrrZdRByk/PSWmZkVwG+sm5lZZi4iZmaWmYuImZll5iJiZmaZuYiYmVlmLiJmZpZZTUVE0hW1xMzMbHyp9Urkz2uMmZnZOHLSlw0lfRT4GFCS9N8qVk2m/Pa4jXMDAwN0d3e/tdzW1kZTU65DspnZGDLSv/aJwKTU7n0V8dd4e8wqG8e6u7u5YdWjTCo1c7y/j3uWX8usWbOKTsvM6uSkRSQifgz8WNL9EfFCnXKyBjOp1Mzk6a1Fp2FmBaj1vsPZklYDrZXbRMS8PJIyM7PGUGsR+V/AXwD3Am/ml46ZmTWSWovIQETcnWsmZmbWcGp9xPdvJN0oabqkcwenXDMzM7Mxr9Yrkc7088sVsQD+xeimY2ZmjaSmIhIRM/NOxMzMGk+tw54sqTaNsM0MSdsl7Ze0T9LNKX6upK2SutPPqSkuSXdK6pG0R9IlFfvqTO27JXVWxC+VtDdtc2f6rruZmdVJrX0iH6mY/jXwFeBTI2wzAHwpImYDc4HlkmYDK4BtEdEGbEvLAAuAtjQtA+6GctEBbgUup/wp3FsHC09q84WK7TpqPB8zMxsFtd7O+v3KZUlTgHUjbHMYOJzmfynpANAMLASuTM3WAj8CbknxByIigCckTZE0PbXdGhFH07G3Ah2SfgRMjognUvwBYBHwWC3nZGZmpy/rUPD/ANTcTyKpFbgY2AFMSwUG4EVgWppvBg5VbNabYieL91aJVzv+Mkldkrr6+/trTdvMzEZQ05WIpL+h/DQWlAdenAWsr3HbScD3gC9GxGuV3RYREZJi2I1HSUSsBlYDtLe35348M7PxotZHfP+0Yn4AeCEieodrPEjSWZQLyIMR8XAKvyRpekQcTrerjqR4HzCjYvOWFOvj7dtfg/EfpXhLlfZmZlYnNd3OSgMxPkN5JN+pwBsjbZOelLoPOBAR36pYtYm33zvpBDZWxJekp7TmAq+m215bgPmSpqYO9fnAlrTuNUlz07GWVOzLzMzqoNZHfD8DPAn8LvAZYIekkYaCvwL4HDBP0u40XQN8A/iEpG7gd9IywGbgINADfAe4ESB1qH8N2Jmmrw52sqc296ZtnsOd6mZmdVXr7aw/BD4SEUcAJJWA/w1sGG6DiPgpMNx7G1dVaR/A8mH2tQZYUyXeBVw0UvJmZpaPWp/OetdgAUlePoVtzczsDFXrlcgPJG0BHkrL/47y7SczMxvHRvrG+gWU3+v4sqR/C/xWWvW3wIN5J2dmZmPbSFci3wZWAqRHdB8GkPQv07p/k2NuZmY2xo3UrzEtIvYODaZYay4ZmZlZwxipiEw5ybpzRjEPMzNrQCMVkS5JXxgalPR7wK58UjIzs0YxUp/IF4HvS/osbxeNdmAi8Okc8zIzswZw0iISES8BH5P0cd5+qe/RiHg898zMzGzMq/V7ItuB7TnnYmZmDcZvnZuZWWYuImZmlpmLiJmZZeYiYmZmmbmImJlZZi4iZmaWmYuImZll5iJiZmaZ5VZEJK2RdETS0xWxr0jqG/LN9cF1KyX1SHpW0tUV8Y4U65G0oiI+U9KOFP+upIl5nYuZmVWX55XI/UBHlfgdETEnTZsBJM0GFgMXpm3ukjRB0gRgFbAAmA1cn9oC3J72dQFwDFia47mYmVkVuRWRiPgJcLTG5guBdRHxekQ8D/QAl6WpJyIORsQbwDpgoSQB84ANafu1wKLRzN+KNTAwwIEDB96aBgYGik7JzKoook/kJkl70u2uqSnWDByqaNObYsPFPwC8EhEDQ+JVSVomqUtSV39//2idh+Wou7ubG1Y9ypfW7+aGVY/S3d1ddEpmVkW9i8jdwG8Cc4DDwDfrcdCIWB0R7RHRXiqV6nFIGwWTSs1Mnt7KpNKw/39gZgWraRTf0ZKGlgdA0neAR9JiHzCjomlLijFM/GVgiqSmdDVS2d7MzOqkrlcikqZXLH4aGHxyaxOwWNLZkmYCbcCTwE6gLT2JNZFy5/umiAjKQ9Nfl7bvBDbW4xzMzOxtuV2JSHoIuBI4T1IvcCtwpaQ5QAC/AG4AiIh9ktYD+4EBYHlEvJn2cxOwBZgArImIfekQtwDrJH0deAq4L69zMTOz6nIrIhFxfZXwsL/oI+I24LYq8c3A5irxg5Sf3jIzs4L4jXUzM8vMRcTMzDJzETEzs8xcRMzMLDMXETMzy8xFxMzMMnMRMTOzzFxEzMwss7qOnWVWtIGBgXeMCNzW1kZTk/8ZmGXlfz02rgwOMT+p1Mzx/j7uWX4ts2bNKjots4blImLjzuAQ82Z2+twnYmZmmbmImJlZZi4iZmaWmYuImZll5iJiZmaZuYiYmVlmuRURSWskHZH0dEXsXElbJXWnn1NTXJLulNQjaY+kSyq26UztuyV1VsQvlbQ3bXOnJOV1LmZmVl2eVyL3Ax1DYiuAbRHRBmxLywALgLY0LQPuhnLRofxt9sspfwr31sHCk9p8oWK7occyM7Oc5VZEIuInwNEh4YXA2jS/FlhUEX8gyp4ApkiaDlwNbI2IoxFxDNgKdKR1kyPiiYgI4IGKfZmZWZ3U+431aRFxOM2/CExL883AoYp2vSl2snhvlbhZ4YaOzwUeo8vOXIX9rY6IkBT1OJakZZRvk3H++efX45A2jlWOzwV4jC47o9X76ayX0q0o0s8jKd4HzKho15JiJ4u3VIlXFRGrI6I9ItpLpdJpn4TZSAbH55o8vfWtYmJ2Jqp3EdkEDD5h1QlsrIgvSU9pzQVeTbe9tgDzJU1NHerzgS1p3WuS5qanspZU7MvMzOokt9tZkh4CrgTOk9RL+SmrbwDrJS0FXgA+k5pvBq4BeoBfAZ8HiIijkr4G7EztvhoRg531N1J+Auwc4LE0mZlZHeVWRCLi+mFWXVWlbQDLh9nPGmBNlXgXcNHp5GhmZqfHj4uYNRB/mdHGGv/tM2sg/jKjjTUuImYNxl9mtLHEAzCamVlmLiJmZpaZb2eZ2Tu4895Ohf9mmNk7uPPeToWLiJmdwJ33Viv3iZiZWWYuImZmlpmLiJmZZeYiYmZmmbmImJlZZi4iZmaWmYuImZll5iJiZmaZ+WVDM6sLD6dyZvJ/QTOrCw+ncmYq5HaWpF9I2itpt6SuFDtX0lZJ3enn1BSXpDsl9UjaI+mSiv10pvbdkjqLOBczq93gcCqTSs1Fp2KjpMg+kY9HxJyIaE/LK4BtEdEGbEvLAAuAtjQtA+6GctEBbgUuBy4Dbh0sPGZmVh9jqWN9IbA2za8FFlXEH4iyJ4ApkqYDVwNbI+JoRBwDtgIddc7ZzGxcK6pPJIAfSgrgnohYDUyLiMNp/YvAtDTfDByq2LY3xYaLn0DSMspXMZx//vmjdQ5mNgYM7bAHd9rXU1F/yr8VEX2S/hmwVdIzlSsjIlKBGRWpSK0GaG9vH7X9mlnxKjvsAXfa11khRSQi+tLPI5K+T7lP4yVJ0yPicLpddSQ17wNmVGzekmJ9wJVD4j/KOXUzG4P8/ZPi1L1PRNJ7Jb1vcB6YDzwNbAIGn7DqBDam+U3AkvSU1lzg1XTbawswX9LU1KE+P8XMzKxOirgSmQZ8X9Lg8f9nRPxA0k5gvaSlwAvAZ1L7zcA1QA/wK+DzABFxVNLXgJ2p3Vcj4mj9TsPMzOpeRCLiIPDhKvGXgauqxANYPsy+1gBrRjtHMzOrzVh6xNfMzBqMi4iZmWXmImJmZpn5bRwzsxp4FOLq/CdgZlYDj0JcnYuImVmN/FLjidwnYmZmmbmImJlZZi4iZmaWmYuImZll5o51M7MCNfqjw42TqZnZGajRHx12ETEzK1gjPzrsPhEzM8vMRcTMzDLz7SwzszNQvTrsXUTMzM5A9eqwdxExMztD1aPDvuH7RCR1SHpWUo+kFUXnY2Y2njR0EZE0AVgFLABmA9dLml1sVmZm40ej3866DOiJiIMAktYBC4H9eRzseH/fWz+fe+59o7LP5557rqH2W6/jNNp+hzvGaB+n3vk3Qu55/nkPd5xG+3Mp/5wzKvsdShGRy47rQdJ1QEdE/F5a/hxweUTcNKTdMmBZWvwg8GzGQ54H/H3GbYvm3OuvUfMG516UsZz7b0REaWiw0a9EahIRq4HVp7sfSV0R0T4KKdWdc6+/Rs0bnHtRGjH3hu4TAfqAGRXLLSlmZmZ10OhFZCfQJmmmpInAYmBTwTmZmY0bDX07KyIGJN0EbAEmAGsiYl+OhzztW2IFcu7116h5g3MvSsPl3tAd62ZmVqxGv51lZmYFchExM7PMXERq0KhDq0iaIWm7pP2S9km6ueicTpWkCZKekvRI0bmcCklTJG2Q9IykA5I+WnROtZL0X9Pfl6clPSTp3UXnNBxJayQdkfR0RexcSVsldaefU4vMcTjD5P4n6e/MHknflzSlwBRr4iIyggYfWmUA+FJEzAbmAssbKPdBNwMHik4igz8DfhARHwI+TIOcg6Rm4L8A7RFxEeUHVhYXm9VJ3Q90DImtALZFRBuwLS2PRfdzYu5bgYsi4l8B/xdYWe+kTpWLyMjeGlolIt4ABodWGfMi4nBE/CzN/5LyL7LmYrOqnaQW4Frg3qJzORWS3g/8NnAfQES8ERGvFJrUqWkCzpHUBLwH+H8F5zOsiPgJcHRIeCGwNs2vBRbVM6daVcs9In4YEQNp8QnK776NaS4iI2sGDlUs99JAv4gHSWoFLgZ2FJzKqfg28AfAPxWcx6maCfQDf5luxd0r6b1FJ1WLiOgD/hT4O+Aw8GpE/LDYrE7ZtIg4nOZfBKYVmcxp+E/AY0UnMRIXkXFA0iTge8AXI+K1ovOphaRPAkciYlfRuWTQBFwC3B0RFwP/wNi9pfIOqf9gIeVC+M+B90r6D8VmlV2U32FouPcYJP0h5dvRDxady0hcREbW0EOrSDqLcgF5MCIeLjqfU3AF8ClJv6B8C3GepL8uNqWa9QK9ETF41beBclFpBL8DPB8R/RHxa+Bh4GMF53SqXpI0HSD9PFJwPqdE0n8EPgl8NhrgRT4XkZE17NAqkkT5vvyBiPhW0fmciohYGREtEdFK+c/88YhoiP8jjogXgUOSPphCV5HT5wly8HfAXEnvSX9/rqJBHgqosAnoTPOdwMYCczklkjoo38L9VET8quh8auEiMoLUyTU4tMoBYH3OQ6uMpiuAz1H+v/jdabqm6KTGid8HHpS0h/KHHP642HRqk66eNgA/A/ZS/h0xZofikPQQ8LfAByX1SloKfAP4hKRuyldW3ygyx+EMk/v/AN4HbE3/Xv+i0CRr4GFPzMwsM1+JmJlZZi4iZmaWmYuImZll5iJiZmaZuYiYmVlmLiJmOZJ0fIT1rZWjuNa4z/slXXd6mZmNDhcRMzPLzEXErA4kTZK0TdLPJO2VVDkSdJOkB9N3RzZIek/a5lJJP5a0S9KWwaE8zMYSFxGz+vhH4NMRcQnwceCbaVgRgA8Cd0XELOA14MY05tmfA9dFxKXAGuC2AvI2O6mmohMwGycE/LGk36Y8tH0zbw9Rfigi/k+a/2vKH4X6AXAR5eEvoPxxqMOYjTEuImb18VmgBFwaEb9OoxMPfnZ26NhDQbno7IuIhvmsro1Pvp1lVh/vp/x9lF9L+jjwGxXrzq/4Bvu/B34KPAuUBuOSzpJ0YV0zNquBi4hZfTwItEvaCywBnqlY9yywXNIBYCrlj1m9AVwH3C7p58BuGu+7HjYOeBRfMzPLzFciZmaWmYuImZll5iJiZmaZuYiYmVlmLiJmZpaZi4iZmWXmImJmZpn9fzhM0TY71NkZAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.histplot(train_df.label)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "dacc64af",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<AxesSubplot:xlabel='text', ylabel='Count'>"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Error in callback <function flush_figures at 0x000002487FE29870> (for post_execute):\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "File \u001b[1;32mC:\\veighna_studio\\lib\\site-packages\\matplotlib_inline\\backend_inline.py:121\u001b[0m, in \u001b[0;36mflush_figures\u001b[1;34m()\u001b[0m\n\u001b[0;32m    118\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m InlineBackend\u001b[38;5;241m.\u001b[39minstance()\u001b[38;5;241m.\u001b[39mclose_figures:\n\u001b[0;32m    119\u001b[0m     \u001b[38;5;66;03m# ignore the tracking, just draw and close all figures\u001b[39;00m\n\u001b[0;32m    120\u001b[0m     \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 121\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mshow\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[0;32m    122\u001b[0m     \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m    123\u001b[0m         \u001b[38;5;66;03m# safely show traceback if in IPython, else raise\u001b[39;00m\n\u001b[0;32m    124\u001b[0m         ip \u001b[38;5;241m=\u001b[39m get_ipython()\n",
      "File \u001b[1;32mC:\\veighna_studio\\lib\\site-packages\\matplotlib_inline\\backend_inline.py:41\u001b[0m, in \u001b[0;36mshow\u001b[1;34m(close, block)\u001b[0m\n\u001b[0;32m     39\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m     40\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m figure_manager \u001b[38;5;129;01min\u001b[39;00m Gcf\u001b[38;5;241m.\u001b[39mget_all_fig_managers():\n\u001b[1;32m---> 41\u001b[0m         \u001b[43mdisplay\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m     42\u001b[0m \u001b[43m            \u001b[49m\u001b[43mfigure_manager\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcanvas\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfigure\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m     43\u001b[0m \u001b[43m            \u001b[49m\u001b[43mmetadata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_fetch_figure_metadata\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfigure_manager\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcanvas\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfigure\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m     44\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m     45\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[0;32m     46\u001b[0m     show\u001b[38;5;241m.\u001b[39m_to_draw \u001b[38;5;241m=\u001b[39m []\n",
      "File \u001b[1;32mC:\\veighna_studio\\lib\\site-packages\\IPython\\core\\display_functions.py:298\u001b[0m, in \u001b[0;36mdisplay\u001b[1;34m(include, exclude, metadata, transient, display_id, raw, clear, *objs, **kwargs)\u001b[0m\n\u001b[0;32m    296\u001b[0m     publish_display_data(data\u001b[38;5;241m=\u001b[39mobj, metadata\u001b[38;5;241m=\u001b[39mmetadata, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m    297\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 298\u001b[0m     format_dict, md_dict \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mformat\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minclude\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minclude\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexclude\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mexclude\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    299\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m format_dict:\n\u001b[0;32m    300\u001b[0m         \u001b[38;5;66;03m# nothing to display (e.g. _ipython_display_ took over)\u001b[39;00m\n\u001b[0;32m    301\u001b[0m         \u001b[38;5;28;01mcontinue\u001b[39;00m\n",
      "File \u001b[1;32mC:\\veighna_studio\\lib\\site-packages\\IPython\\core\\formatters.py:178\u001b[0m, in \u001b[0;36mDisplayFormatter.format\u001b[1;34m(self, obj, include, exclude)\u001b[0m\n\u001b[0;32m    176\u001b[0m md \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m    177\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 178\u001b[0m     data \u001b[38;5;241m=\u001b[39m \u001b[43mformatter\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    179\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m:\n\u001b[0;32m    180\u001b[0m     \u001b[38;5;66;03m# FIXME: log the exception\u001b[39;00m\n\u001b[0;32m    181\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m\n",
      "File \u001b[1;32mC:\\veighna_studio\\lib\\site-packages\\decorator.py:232\u001b[0m, in \u001b[0;36mdecorate.<locals>.fun\u001b[1;34m(*args, **kw)\u001b[0m\n\u001b[0;32m    230\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m kwsyntax:\n\u001b[0;32m    231\u001b[0m     args, kw \u001b[38;5;241m=\u001b[39m fix(args, kw, sig)\n\u001b[1;32m--> 232\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m caller(func, \u001b[38;5;241m*\u001b[39m(extras \u001b[38;5;241m+\u001b[39m args), \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkw)\n",
      "File \u001b[1;32mC:\\veighna_studio\\lib\\site-packages\\IPython\\core\\formatters.py:222\u001b[0m, in \u001b[0;36mcatch_format_error\u001b[1;34m(method, self, *args, **kwargs)\u001b[0m\n\u001b[0;32m    220\u001b[0m \u001b[38;5;124;03m\"\"\"show traceback on failed format call\"\"\"\u001b[39;00m\n\u001b[0;32m    221\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 222\u001b[0m     r \u001b[38;5;241m=\u001b[39m method(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m    223\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m:\n\u001b[0;32m    224\u001b[0m     \u001b[38;5;66;03m# don't warn on NotImplementedErrors\u001b[39;00m\n\u001b[0;32m    225\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_return(\u001b[38;5;28;01mNone\u001b[39;00m, args[\u001b[38;5;241m0\u001b[39m])\n",
      "File \u001b[1;32mC:\\veighna_studio\\lib\\site-packages\\IPython\\core\\formatters.py:339\u001b[0m, in \u001b[0;36mBaseFormatter.__call__\u001b[1;34m(self, obj)\u001b[0m\n\u001b[0;32m    337\u001b[0m     \u001b[38;5;28;01mpass\u001b[39;00m\n\u001b[0;32m    338\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 339\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mprinter\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    340\u001b[0m \u001b[38;5;66;03m# Finally look for special method names\u001b[39;00m\n\u001b[0;32m    341\u001b[0m method \u001b[38;5;241m=\u001b[39m get_real_method(obj, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprint_method)\n",
      "File \u001b[1;32mC:\\veighna_studio\\lib\\site-packages\\IPython\\core\\pylabtools.py:151\u001b[0m, in \u001b[0;36mprint_figure\u001b[1;34m(fig, fmt, bbox_inches, base64, **kwargs)\u001b[0m\n\u001b[0;32m    148\u001b[0m     \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmatplotlib\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mbackend_bases\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m FigureCanvasBase\n\u001b[0;32m    149\u001b[0m     FigureCanvasBase(fig)\n\u001b[1;32m--> 151\u001b[0m fig\u001b[38;5;241m.\u001b[39mcanvas\u001b[38;5;241m.\u001b[39mprint_figure(bytes_io, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkw)\n\u001b[0;32m    152\u001b[0m data \u001b[38;5;241m=\u001b[39m bytes_io\u001b[38;5;241m.\u001b[39mgetvalue()\n\u001b[0;32m    153\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m fmt \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124msvg\u001b[39m\u001b[38;5;124m'\u001b[39m:\n",
      "File \u001b[1;32mC:\\veighna_studio\\lib\\site-packages\\matplotlib\\backend_bases.py:2295\u001b[0m, in \u001b[0;36mFigureCanvasBase.print_figure\u001b[1;34m(self, filename, dpi, facecolor, edgecolor, orientation, format, bbox_inches, pad_inches, bbox_extra_artists, backend, **kwargs)\u001b[0m\n\u001b[0;32m   2289\u001b[0m     renderer \u001b[38;5;241m=\u001b[39m _get_renderer(\n\u001b[0;32m   2290\u001b[0m         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfigure,\n\u001b[0;32m   2291\u001b[0m         functools\u001b[38;5;241m.\u001b[39mpartial(\n\u001b[0;32m   2292\u001b[0m             print_method, orientation\u001b[38;5;241m=\u001b[39morientation)\n\u001b[0;32m   2293\u001b[0m     )\n\u001b[0;32m   2294\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(renderer, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_draw_disabled\u001b[39m\u001b[38;5;124m\"\u001b[39m, nullcontext)():\n\u001b[1;32m-> 2295\u001b[0m         \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfigure\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdraw\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrenderer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m   2297\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m bbox_inches:\n\u001b[0;32m   2298\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m bbox_inches \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtight\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n",
      "File \u001b[1;32mC:\\veighna_studio\\lib\\site-packages\\matplotlib\\artist.py:73\u001b[0m, in \u001b[0;36m_finalize_rasterization.<locals>.draw_wrapper\u001b[1;34m(artist, renderer, *args, **kwargs)\u001b[0m\n\u001b[0;32m     71\u001b[0m \u001b[38;5;129m@wraps\u001b[39m(draw)\n\u001b[0;32m     72\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdraw_wrapper\u001b[39m(artist, renderer, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m---> 73\u001b[0m     result \u001b[38;5;241m=\u001b[39m draw(artist, renderer, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m     74\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m renderer\u001b[38;5;241m.\u001b[39m_rasterizing:\n\u001b[0;32m     75\u001b[0m         renderer\u001b[38;5;241m.\u001b[39mstop_rasterizing()\n",
      "File \u001b[1;32mC:\\veighna_studio\\lib\\site-packages\\matplotlib\\artist.py:50\u001b[0m, in \u001b[0;36mallow_rasterization.<locals>.draw_wrapper\u001b[1;34m(artist, renderer)\u001b[0m\n\u001b[0;32m     47\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m artist\u001b[38;5;241m.\u001b[39mget_agg_filter() \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m     48\u001b[0m         renderer\u001b[38;5;241m.\u001b[39mstart_filter()\n\u001b[1;32m---> 50\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mdraw\u001b[49m\u001b[43m(\u001b[49m\u001b[43martist\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrenderer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m     51\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[0;32m     52\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m artist\u001b[38;5;241m.\u001b[39mget_agg_filter() \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
      "File \u001b[1;32mC:\\veighna_studio\\lib\\site-packages\\matplotlib\\figure.py:2810\u001b[0m, in \u001b[0;36mFigure.draw\u001b[1;34m(self, renderer)\u001b[0m\n\u001b[0;32m   2807\u001b[0m         \u001b[38;5;66;03m# ValueError can occur when resizing a window.\u001b[39;00m\n\u001b[0;32m   2809\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpatch\u001b[38;5;241m.\u001b[39mdraw(renderer)\n\u001b[1;32m-> 2810\u001b[0m \u001b[43mmimage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_draw_list_compositing_images\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m   2811\u001b[0m \u001b[43m    \u001b[49m\u001b[43mrenderer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43martists\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msuppressComposite\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m   2813\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m sfig \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msubfigs:\n\u001b[0;32m   2814\u001b[0m     sfig\u001b[38;5;241m.\u001b[39mdraw(renderer)\n",
      "File \u001b[1;32mC:\\veighna_studio\\lib\\site-packages\\matplotlib\\image.py:132\u001b[0m, in \u001b[0;36m_draw_list_compositing_images\u001b[1;34m(renderer, parent, artists, suppress_composite)\u001b[0m\n\u001b[0;32m    130\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m not_composite \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m has_images:\n\u001b[0;32m    131\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m a \u001b[38;5;129;01min\u001b[39;00m artists:\n\u001b[1;32m--> 132\u001b[0m         \u001b[43ma\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdraw\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrenderer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    133\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m    134\u001b[0m     \u001b[38;5;66;03m# Composite any adjacent images together\u001b[39;00m\n\u001b[0;32m    135\u001b[0m     image_group \u001b[38;5;241m=\u001b[39m []\n",
      "File \u001b[1;32mC:\\veighna_studio\\lib\\site-packages\\matplotlib\\artist.py:50\u001b[0m, in \u001b[0;36mallow_rasterization.<locals>.draw_wrapper\u001b[1;34m(artist, renderer)\u001b[0m\n\u001b[0;32m     47\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m artist\u001b[38;5;241m.\u001b[39mget_agg_filter() \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m     48\u001b[0m         renderer\u001b[38;5;241m.\u001b[39mstart_filter()\n\u001b[1;32m---> 50\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mdraw\u001b[49m\u001b[43m(\u001b[49m\u001b[43martist\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrenderer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m     51\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[0;32m     52\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m artist\u001b[38;5;241m.\u001b[39mget_agg_filter() \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
      "File \u001b[1;32mC:\\veighna_studio\\lib\\site-packages\\matplotlib\\axes\\_base.py:3082\u001b[0m, in \u001b[0;36m_AxesBase.draw\u001b[1;34m(self, renderer)\u001b[0m\n\u001b[0;32m   3079\u001b[0m         a\u001b[38;5;241m.\u001b[39mdraw(renderer)\n\u001b[0;32m   3080\u001b[0m     renderer\u001b[38;5;241m.\u001b[39mstop_rasterizing()\n\u001b[1;32m-> 3082\u001b[0m \u001b[43mmimage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_draw_list_compositing_images\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m   3083\u001b[0m \u001b[43m    \u001b[49m\u001b[43mrenderer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43martists\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfigure\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msuppressComposite\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m   3085\u001b[0m renderer\u001b[38;5;241m.\u001b[39mclose_group(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124maxes\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m   3086\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstale \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n",
      "File \u001b[1;32mC:\\veighna_studio\\lib\\site-packages\\matplotlib\\image.py:132\u001b[0m, in \u001b[0;36m_draw_list_compositing_images\u001b[1;34m(renderer, parent, artists, suppress_composite)\u001b[0m\n\u001b[0;32m    130\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m not_composite \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m has_images:\n\u001b[0;32m    131\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m a \u001b[38;5;129;01min\u001b[39;00m artists:\n\u001b[1;32m--> 132\u001b[0m         \u001b[43ma\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdraw\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrenderer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    133\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m    134\u001b[0m     \u001b[38;5;66;03m# Composite any adjacent images together\u001b[39;00m\n\u001b[0;32m    135\u001b[0m     image_group \u001b[38;5;241m=\u001b[39m []\n",
      "File \u001b[1;32mC:\\veighna_studio\\lib\\site-packages\\matplotlib\\artist.py:50\u001b[0m, in \u001b[0;36mallow_rasterization.<locals>.draw_wrapper\u001b[1;34m(artist, renderer)\u001b[0m\n\u001b[0;32m     47\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m artist\u001b[38;5;241m.\u001b[39mget_agg_filter() \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m     48\u001b[0m         renderer\u001b[38;5;241m.\u001b[39mstart_filter()\n\u001b[1;32m---> 50\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mdraw\u001b[49m\u001b[43m(\u001b[49m\u001b[43martist\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrenderer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m     51\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[0;32m     52\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m artist\u001b[38;5;241m.\u001b[39mget_agg_filter() \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
      "File \u001b[1;32mC:\\veighna_studio\\lib\\site-packages\\matplotlib\\axis.py:1159\u001b[0m, in \u001b[0;36mAxis.draw\u001b[1;34m(self, renderer, *args, **kwargs)\u001b[0m\n\u001b[0;32m   1156\u001b[0m renderer\u001b[38;5;241m.\u001b[39mopen_group(\u001b[38;5;18m__name__\u001b[39m, gid\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mget_gid())\n\u001b[0;32m   1158\u001b[0m ticks_to_draw \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_update_ticks()\n\u001b[1;32m-> 1159\u001b[0m ticklabelBoxes, ticklabelBoxes2 \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_tick_bboxes\u001b[49m\u001b[43m(\u001b[49m\u001b[43mticks_to_draw\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   1160\u001b[0m \u001b[43m                                                        \u001b[49m\u001b[43mrenderer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m   1162\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m tick \u001b[38;5;129;01min\u001b[39;00m ticks_to_draw:\n\u001b[0;32m   1163\u001b[0m     tick\u001b[38;5;241m.\u001b[39mdraw(renderer)\n",
      "File \u001b[1;32mC:\\veighna_studio\\lib\\site-packages\\matplotlib\\axis.py:1085\u001b[0m, in \u001b[0;36mAxis._get_tick_bboxes\u001b[1;34m(self, ticks, renderer)\u001b[0m\n\u001b[0;32m   1083\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_get_tick_bboxes\u001b[39m(\u001b[38;5;28mself\u001b[39m, ticks, renderer):\n\u001b[0;32m   1084\u001b[0m     \u001b[38;5;124;03m\"\"\"Return lists of bboxes for ticks' label1's and label2's.\"\"\"\u001b[39;00m\n\u001b[1;32m-> 1085\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m ([tick\u001b[38;5;241m.\u001b[39mlabel1\u001b[38;5;241m.\u001b[39mget_window_extent(renderer)\n\u001b[0;32m   1086\u001b[0m              \u001b[38;5;28;01mfor\u001b[39;00m tick \u001b[38;5;129;01min\u001b[39;00m ticks \u001b[38;5;28;01mif\u001b[39;00m tick\u001b[38;5;241m.\u001b[39mlabel1\u001b[38;5;241m.\u001b[39mget_visible()],\n\u001b[0;32m   1087\u001b[0m             [tick\u001b[38;5;241m.\u001b[39mlabel2\u001b[38;5;241m.\u001b[39mget_window_extent(renderer)\n\u001b[0;32m   1088\u001b[0m              \u001b[38;5;28;01mfor\u001b[39;00m tick \u001b[38;5;129;01min\u001b[39;00m ticks \u001b[38;5;28;01mif\u001b[39;00m tick\u001b[38;5;241m.\u001b[39mlabel2\u001b[38;5;241m.\u001b[39mget_visible()])\n",
      "File \u001b[1;32mC:\\veighna_studio\\lib\\site-packages\\matplotlib\\axis.py:1085\u001b[0m, in \u001b[0;36m<listcomp>\u001b[1;34m(.0)\u001b[0m\n\u001b[0;32m   1083\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_get_tick_bboxes\u001b[39m(\u001b[38;5;28mself\u001b[39m, ticks, renderer):\n\u001b[0;32m   1084\u001b[0m     \u001b[38;5;124;03m\"\"\"Return lists of bboxes for ticks' label1's and label2's.\"\"\"\u001b[39;00m\n\u001b[1;32m-> 1085\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m ([\u001b[43mtick\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlabel1\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_window_extent\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrenderer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m   1086\u001b[0m              \u001b[38;5;28;01mfor\u001b[39;00m tick \u001b[38;5;129;01min\u001b[39;00m ticks \u001b[38;5;28;01mif\u001b[39;00m tick\u001b[38;5;241m.\u001b[39mlabel1\u001b[38;5;241m.\u001b[39mget_visible()],\n\u001b[0;32m   1087\u001b[0m             [tick\u001b[38;5;241m.\u001b[39mlabel2\u001b[38;5;241m.\u001b[39mget_window_extent(renderer)\n\u001b[0;32m   1088\u001b[0m              \u001b[38;5;28;01mfor\u001b[39;00m tick \u001b[38;5;129;01min\u001b[39;00m ticks \u001b[38;5;28;01mif\u001b[39;00m tick\u001b[38;5;241m.\u001b[39mlabel2\u001b[38;5;241m.\u001b[39mget_visible()])\n",
      "File \u001b[1;32mC:\\veighna_studio\\lib\\site-packages\\matplotlib\\text.py:910\u001b[0m, in \u001b[0;36mText.get_window_extent\u001b[1;34m(self, renderer, dpi)\u001b[0m\n\u001b[0;32m    907\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mCannot get window extent w/o renderer\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m    909\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m cbook\u001b[38;5;241m.\u001b[39m_setattr_cm(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfigure, dpi\u001b[38;5;241m=\u001b[39mdpi):\n\u001b[1;32m--> 910\u001b[0m     bbox, info, descent \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_layout\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_renderer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    911\u001b[0m     x, y \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mget_unitless_position()\n\u001b[0;32m    912\u001b[0m     x, y \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mget_transform()\u001b[38;5;241m.\u001b[39mtransform((x, y))\n",
      "File \u001b[1;32mC:\\veighna_studio\\lib\\site-packages\\matplotlib\\text.py:317\u001b[0m, in \u001b[0;36mText._get_layout\u001b[1;34m(self, renderer)\u001b[0m\n\u001b[0;32m    315\u001b[0m clean_line, ismath \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_preprocess_math(line)\n\u001b[0;32m    316\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m clean_line:\n\u001b[1;32m--> 317\u001b[0m     w, h, d \u001b[38;5;241m=\u001b[39m \u001b[43mrenderer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_text_width_height_descent\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m    318\u001b[0m \u001b[43m        \u001b[49m\u001b[43mclean_line\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_fontproperties\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mismath\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mismath\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    319\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m    320\u001b[0m     w \u001b[38;5;241m=\u001b[39m h \u001b[38;5;241m=\u001b[39m d \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n",
      "File \u001b[1;32mC:\\veighna_studio\\lib\\site-packages\\matplotlib\\backends\\backend_agg.py:270\u001b[0m, in \u001b[0;36mRendererAgg.get_text_width_height_descent\u001b[1;34m(self, s, prop, ismath)\u001b[0m\n\u001b[0;32m    268\u001b[0m flags \u001b[38;5;241m=\u001b[39m get_hinting_flag()\n\u001b[0;32m    269\u001b[0m font \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_agg_font(prop)\n\u001b[1;32m--> 270\u001b[0m \u001b[43mfont\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mset_text\u001b[49m\u001b[43m(\u001b[49m\u001b[43ms\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0.0\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mflags\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mflags\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    271\u001b[0m w, h \u001b[38;5;241m=\u001b[39m font\u001b[38;5;241m.\u001b[39mget_width_height()  \u001b[38;5;66;03m# width and height of unrotated string\u001b[39;00m\n\u001b[0;32m    272\u001b[0m d \u001b[38;5;241m=\u001b[39m font\u001b[38;5;241m.\u001b[39mget_descent()\n",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "sns.histplot(train_df.text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "91876953",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c3576585",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2487341e",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
